library(tidyverse)
library(haven)
library(writexl)
library(foreign)
library(readxl)
library(dplyr)
library(Hmisc)
library(ggplot2)
library(dataverse)
library(tibble)

################## Loading Dataframes from Harvard Dataserve ############################

experiment2023 <- get_dataframe_by_name(
  filename = "Lastingpopulistsupport_Experimento_20marzo2023_Pichincha.tab",
  dataset = "10.7910/DVN/EU0CEB", 
  server = "dataverse.harvard.edu")

electoral2023 <- get_dataframe_by_name(
  filename = "Lastingpopulistsupport_Encuesta_7oct2023_Quito.tab",
  dataset = "10.7910/DVN/EU0CEB", 
  server = "dataverse.harvard.edu")

# For downloading the dataframes and codebooks in one's own computer, please visit: ###########
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi%3A10.7910%2FDVN%2FEU0CEB ###########

################## LOADING THE OTHER DATAFRAMES FROM A LOCAL FOLDER ############################

setwd('folder________')
load("Lastingpopulistsupport_Experimento_6junio2024_Ecuador.RData")
load("Lastingpopulistsupport_Encuesta_marzo2022_Ecuador.RData")

################## ANALYZING THE SURVEY EXPERIMENTS ############################

##Cleaning the data
#Only taking into account Quito for the purpose of this analysis

describe(experiment2023$canton)
experiment2023 <- subset(experiment2023, experiment2023$canton==60)

##Preparing the variables
# joining DV = presi
experiment2023 <- experiment2023 %>%
  mutate(presi = ifelse(is.na(presi_a), as.character(presi_b), 
                                  ifelse(is.na(presi_b), as.character(presi_a), 
                                         paste(presi_a, presi_b))))
experiment2023$presi <- as.numeric(experiment2023$presi)
experiment2023$sec <- as.numeric(experiment2023$sec)
describe(experiment2023$presi)

#Dichotomic DV = presi_two
experiment2023 <- experiment2023 %>%
  mutate(presi_two = ifelse(presi == 5, 1, 0))
describe(experiment2023$presi_two)

# joining DV = presi
experiment2024 <- experiment2024 %>%
  mutate(presi = ifelse(is.na(v38a), as.character(v38b), 
                        ifelse(is.na(v38b), as.character(v38a), 
                               paste(v38a, v38b))))
experiment2024$presi <- as.numeric(experiment2024$presi)
describe(experiment2024$presi)

experiment2024$formulario <- experiment2024$v38

#Dichotomic DV = presi_two
experiment2024 <- experiment2024 %>%
  mutate(presi_two = ifelse(presi == 6, 1, 0))
describe(experiment2024$presi_two)

### FIRST EXPLORATION ####
#Voting behaviour in the first experiment#
contingency_table1 <- table(experiment2023$presi, experiment2023$formulario)
print(contingency_table1) 
percentages1 <- prop.table(contingency_table1, margin = 2) * 100
print(percentages1)

ggplot(data=experiment2023,aes(x=as.factor(presi),fill=as.factor(formulario))) + 
  geom_bar(data=subset(experiment2023,formulario==1)) + 
  geom_bar(data=subset(experiment2023,formulario==2),aes(y=..count..*(-1))) + 
  coord_flip()

#Voting behaviour in the second experiment#
contingency_table2 <- table(experiment2024$presi, experiment2024$formulario)
print(contingency_table2) 
percentages2 <- prop.table(contingency_table2, margin = 2) * 100
print(percentages2)

ggplot(data=experiment2024,aes(x=as.factor(presi),fill=as.factor(formulario))) + 
  geom_bar(data=subset(experiment2024,formulario==1)) + 
  geom_bar(data=subset(experiment2024,formulario==2),aes(y=..count..*(-1))) + 
  coord_flip()


################## PART 1: EXPERIMENTS ON VOTING INTENTION ############################

### VARIABLES OF THE FIRST MODEL ###

# IV = formulario 
# DV = presi & presi_two
# Socioeconomic variables = edad, parroquia, ing, trab, ocu, sexo, edad, edu, etnia
# Political variables = califica_presidente, elecciones_anticipad, indigenas, destituir

# RANDOMIZATION CHECKS ON SOCIOECONOMIC AND POLITICAL VARIABLES THROUGH 
# T-TEST FOR NUMERIC VARIABLES AND WILCOX TEST FOR ORDINAL VARIABLES

#age(numeric)
edadtest <- t.test(edad ~ formulario, data = experiment2023)
print(edadtest) 
#income(ordinal)
ingtest <- t.test(ing ~ formulario, data = experiment2023)
print(ingtest) 
#socioeconomic(ordinal)
sectest <- t.test(sec ~ formulario, data = experiment2023)
print(sectest) 

#executive approval(ordinal)
califtest <- wilcox.test(califica_presidente ~ formulario, data = experiment2023)
print(califtest)
#want to anticipate presidential elections(ordinal)
antictest <- wilcox.test(elecciones_anticipad ~ formulario, data = experiment2023)
print(antictest)
#approve indigenous protests(ordinal)
indigtest <- wilcox.test(indigenas ~ formulario, data = experiment2023)
print(indigtest)
#want to remove the president from power(ordinal)
destitest <- wilcox.test(destituir ~ formulario, data = experiment2023)
print(destitest)

# CHI-SQUARE TEST FOR NOMINAL 

#gender(categorial)
gentest <- table(experiment2023$formulario, experiment2023$sexo)
chisq.test(gentest)
#territory(nominal)
parrtest <- table(experiment2023$formulario, experiment2023$parroquia)
chisq.test(parrtest) 
#work(categorial)
trabtest <- table(experiment2023$formulario, experiment2023$trab)
chisq.test(trabtest) 
#occupation(categorial)
ocutest <- table(experiment2023$formulario, experiment2023$ocu)
chisq.test(ocutest) 
#ethnic group(categorial)
etniatest <- table(experiment2023$formulario, experiment2023$etnia)
chisq.test(etniatest) 

##randomization tests: 12 out of 12 do not present statistically significant differences between the two groups


### VARIABLES OF THE SECOND MODEL ###

# IV = formulario 
# DV = presi & presi_two
# Socioeconomic variables = V6, V2, V39, V7, V8, V5, V14
# Political variables = V36, V37

#age(numeric)
edadtest2 <- t.test(v6 ~ formulario, data = experiment2024)
print(edadtest2) 
#socioeconomic level(ordinal)
ingtest2 <- wilcox.test(v39 ~ formulario, data = experiment2024)
print(ingtest2) 
#executive approval(ordinal)
califtest2 <- wilcox.test(v36 ~ formulario, data = experiment2024)
print(califtest2)

#gender(categorial)
gentest2 <- table(experiment2024$formulario, experiment2024$v5)
chisq.test(gentest2)
#territory(nominal)
parrtest2 <- table(experiment2024$formulario, experiment2024$v2)
chisq.test(parrtest2) 
#work(categorial)
trabtest2 <- table(experiment2024$formulario, experiment2024$v7)
chisq.test(trabtest2) 
#occupation(categorial)
ocutest2 <- table(experiment2024$formulario, experiment2024$v8)
chisq.test(ocutest2) 

##7 out of 7 do not present statistically significant differences between the two groups



## LOGISTIC REGRESSION TO CALCULATE THE IMPACT OF THE LEADER'S PRESENCE IN THE BALLOT ##

#creating variable 'treatment'
experiment2023 <- experiment2023%>%
  mutate (treatment = case_when(formulario == 1 ~ 1,
                                formulario != 1 ~ 0))
describe(experiment2023$treatment)
experiment2024 <- experiment2024%>%
  mutate (treatment = case_when(formulario == 1 ~ 1,
                                formulario != 1 ~ 0))
describe(experiment2024$treatment)

##2023##

#Voting with all candidates(categorical)
vote1 <- wilcox.test(presi ~ formulario, data = experiment2023)
print(vote1)  
#Voting dicotomic(categorical)
voted1 <- wilcox.test(presi_two ~ formulario, data = experiment2023)
print(voted1)  


#all variables complete and ordered
experiment2023 <- subset(experiment2023, experiment2023$califica_presidente!=0)
experiment2023 <- subset(experiment2023, experiment2023$ing!=0)

describe(experiment2023$sec)
describe(experiment2023$edad)
describe(experiment2023$sexo)
describe(experiment2023$califica_presidente)


#Logistic regression of first experiment
  modelb <- glm(presi_two ~ treatment, data = experiment2023, family = binomial)
  summary(modelb)
  predicted_probabilities <- predict(modelb, type = "response")
  data_to_plot <- data.frame(treatment = experiment2023$treatment, 
                             predicted_probabilities = predicted_probabilities)
  ggplot(data_to_plot, aes(x = treatment, y = predicted_probabilities)) +
    scale_x_discrete()+
    geom_smooth(method = "glm", method.args = list(family = "binomial")) +
    labs(x = "treatment", y = "Predicted Probability of presi_two")

#Logistic regression with control variables
  
  model2 <- glm(presi_two ~ treatment + sec + sexo + edad + califica_presidente, data = experiment2023, family = binomial)
  summary(model2)
  predicted_probabilities <- predict(model2, type = "response")
  data_to_plot <- data.frame(treatment = experiment2023$treatment, 
                             predicted_probabilities = predicted_probabilities)
  ggplot(data_to_plot, aes(x = treatment, y = predicted_probabilities)) +
    geom_smooth(method = "glm", method.args = list(family = "binomial"))+
    scale_x_discrete()+
    labs(x = "treatment", y = "Predicted Probability of presi_two") +
    theme_bw()

  exp(cbind(OR = coef(model2), confint(model2)))
  

  
##2024##

#Voting with all candidates(categorical)
  vote2 <- wilcox.test(presi ~ formulario, data = experiment2024)
  print(vote2)  
#Voting dicotomic(categorical)
  voted2 <- wilcox.test(presi_two ~ formulario, data = experiment2024)
  print(voted2)

  
#all variables complete and ordered
experiment2024$gen <- experiment2024$v5
experiment2024$edad <- experiment2024$v6
experiment2024$sec <- experiment2024$v39
experiment2024$califica_presidente <- experiment2024$v36
experiment2024 <- subset(experiment2024, experiment2024$califica_presidente!=0)
  
describe(experiment2024$sec)
describe(experiment2024$edad)
describe(experiment2024$sexo)
describe(experiment2024$califica_presidente)
  
        
#Logistic regression of second experiment
  modelb <- glm(presi_two ~ treatment, data = experiment2024, family = binomial)
  summary(modelb)
  predicted_probabilities <- predict(modelb, type = "response")
  data_to_plot <- data.frame(treatment = experiment2024$treatment, 
                             predicted_probabilities = predicted_probabilities)
  ggplot(data_to_plot, aes(x = treatment, y = predicted_probabilities)) +
    scale_x_discrete()+
    geom_smooth(method = "glm", method.args = list(family = "binomial")) +
    labs(x = "treatment", y = "Predicted Probability of presi_two")

#Logistic regression with control variables
  
  model3 <- glm(presi_two ~ treatment + sec + gen + edad + califica_presidente, data = experiment2024, family = binomial)
  summary(model3)
  predicted_probabilities <- predict(model3, type = "response")
  data_to_plot <- data.frame(treatment = experiment2024$treatment, 
                             predicted_probabilities = predicted_probabilities)
  ggplot(data_to_plot, aes(x = treatment, y = predicted_probabilities)) +
    geom_smooth(method = "glm", method.args = list(family = "binomial"))+
    scale_x_discrete()+
    labs(x = "treatment", y = "Predicted Probability of presi_two") +
    theme_bw()
  



############ PART 2: FEELING THERMOMETERS VERSUS SELF-IDENTIFICATION SCALES #############

### Dropping invalid observations ###

electoral2022 <- electoral2022[!(is.na(electoral2022$therm_1)),] 
electoral2022 <- electoral2022[!(is.na(electoral2022$vote_r2)),] 
electoral2022$correismo <- as.numeric(electoral2022$correismo)
summary(electoral2022$correismo)
describe(electoral2022$correismo)

electoral2023 <- subset(electoral2023, electoral2023$correismo!=0) 
electoral2023 <- subset(electoral2023, electoral2023$correa!=12) 
describe(electoral2023$correismo)
describe(electoral2023$correa)

### Relabelling and standardizing variables ###

electoral2022$correa <- electoral2022$therm_1
describe(electoral2022$correa)

electoral2022 <- electoral2022%>%
  mutate (correa_feeling = case_when(correa <= 19 ~ 1,
                                     correa > 19 & correa < 40 ~ 2,
                                     correa >= 40 & correa <= 60 ~ 3,
                                     correa > 60 & correa < 81 ~ 4,
                                     correa >= 81  ~ 5))
describe(electoral2022$correa_feeling)

electoral2023 <- electoral2023%>%
  mutate (correa_feeling = case_when(correa <= 1 ~ 1,
                                     correa > 1 & correa < 4 ~ 2,
                                     correa >= 4 & correa <= 6 ~ 3,
                                     correa > 6 & correa < 9 ~ 4,
                                     correa >= 9  ~ 5))
describe(electoral2023$correa_feeling)

electoral2023$vote_r1 <- electoral2023$primera
electoral2023$vote_r2 <- electoral2023$presi

electoral2022 <- electoral2022%>%
  mutate (votechoice1 = case_when(vote_r1 == 2 ~ 1,
                                  vote_r1 == 1 ~ 2,
                                  vote_r1 == 3 | vote_r1 == 4 ~ 3,
                                  vote_r1 > 4 ~ 4))
describe(electoral2022$votechoice1)

electoral2022 <- electoral2022%>%
  mutate (votechoice2 = case_when(vote_r2 == 1 ~ 1,
                                  vote_r2 == 2 ~ 2,
                                  vote_r2 > 2 ~ 3))
describe(electoral2022$votechoice2)

describe(electoral2023$vote_r1)
electoral2023 <- electoral2023%>%
  mutate (votechoice1 = case_when(vote_r1 == 5 ~ 1,
                                  vote_r1 == 4 ~ 2,
                                  vote_r1 == 2 | vote_r1 > 5 & vote_r1 < 34 ~ 3,
                                  vote_r1 == 0 | vote_r1 > 96 ~ 4))
describe(electoral2023$votechoice1)

electoral2023 <- electoral2023%>%
  mutate (votechoice2 = case_when(vote_r2 == 5 ~ 1,
                                  vote_r2 == 4 ~ 2,
                                  vote_r2 == 0 | vote_r2 > 96 ~ 3))
describe(electoral2023$votechoice2)

electoral2023 <- electoral2023%>%
  mutate (correismo = case_when(correismo == 5 ~ 1,
                                correismo == 4 ~ 2,
                                correismo == 3 ~ 3,
                                correismo == 2 ~ 4,
                                correismo == 1 ~ 5))
describe(electoral2023$correismo)

### Variables chosen for the analysis ###

# Independent variables
## correismo: self-identification scale
## correa_feeling: standardized feeling scale
# Dependent variales
## votechoice1: vote choice in the first round
## votechoice2: vote choice in the second round

#1: very anticorreista, 5: very correista
describe(electoral2022$correismo)
describe(electoral2023$correismo)
describe(electoral2022$correa_feeling)
describe(electoral2023$correa_feeling)

#1: Correa's candidate, 2: main rival, 3. other candidates, 4: not valid votes
describe(electoral2022$votechoice1)
describe(electoral2023$votechoice1)

### Weighting based on first round real results in UIo/GYE2021 and UIO2023

electoral2022 <- electoral2022%>%
  mutate (weight_r1 = case_when(votechoice1 == 1 ~ 950359/630,
                               votechoice1 == 2 ~ 861486/718,
                               votechoice1 == 3 ~ 1262530/699,
                               votechoice1 == 4 ~ 1174056/815))
describe(electoral2022$weight_r1)

electoral2023 <- electoral2023%>%
  mutate (weight_r1 = case_when(votechoice1 == 1 ~ 394598/294,
                                votechoice1 == 2 ~ 374794/253,
                                votechoice1 == 3 ~ 773915/391,
                                votechoice1 == 4 ~ 470697/295))
describe(electoral2023$weight_r1)

#wilcox.test Test self-identification scales and feeling thermometers
#in order to compare the means and the shape of distributions
wtest1 <- wilcox.test(electoral2022$correismo, electoral2022$correa_feeling, paired = TRUE)
print(wtest1) 
wtest2 <- wilcox.test(electoral2023$correismo, electoral2023$correa_feeling, paired = TRUE)
print(wtest2) 



##Separating self-identified correistas from affectively attached correistas
# 5: self-identified correista, 4: merely affectively attached to Correa
# 3: neutral, 2: merely affectively opposed to Correa
# 1: self-identified anticorreista
electoral2022 <- electoral2022%>%
  mutate (partisans = case_when(correismo == 5 | correismo == 4 ~ 5,
                                correismo == 1 | correismo == 2 ~ 1,
                                correa_feeling == 5 & correismo != 5 ~ 4, 
                                correa_feeling == 5 & correismo != 4 ~ 4,
                                correa_feeling == 4 & correismo != 5 ~ 4, 
                                correa_feeling == 4 & correismo != 4 ~ 4,
                                correa_feeling == 1 & correismo != 1 ~ 2, 
                                correa_feeling == 1 & correismo != 2 ~ 2,
                                correa_feeling == 2 & correismo != 1 ~ 2, 
                                correa_feeling == 2 & correismo != 2 ~ 2))
electoral2022 <- electoral2022 %>%
  mutate(partisans = ifelse(is.na(partisans), 3, partisans))

electoral2023 <- electoral2023%>%
  mutate (partisans = case_when(correismo == 5 | correismo == 4 ~ 5,
                                correismo == 1 | correismo == 2 ~ 1,
                                correa_feeling == 5 & correismo != 5 ~ 4, 
                                correa_feeling == 5 & correismo != 4 ~ 4,
                                correa_feeling == 4 & correismo != 5 ~ 4, 
                                correa_feeling == 4 & correismo != 4 ~ 4,
                                correa_feeling == 1 & correismo != 1 ~ 2, 
                                correa_feeling == 1 & correismo != 2 ~ 2,
                                correa_feeling == 2 & correismo != 1 ~ 2, 
                                correa_feeling == 2 & correismo != 2 ~ 2))
electoral2023 <- electoral2023 %>%
  mutate(partisans = ifelse(is.na(partisans), 3, partisans))


describe(electoral2022$partisans)
describe(electoral2023$partisans)


#weighted frequency tables
weighted_freqtable1 <- xtabs(weight_r1 ~ correismo, data = electoral2022)
perc1 <- prop.table(weighted_freqtable1) * 100
weighted_freqtable2 <- xtabs(weight_r1 ~ correismo, data = electoral2023)
perc2 <- prop.table(weighted_freqtable2) * 100
weighted_freqtable3 <- xtabs(weight_r1 ~ correa_feeling, data = electoral2022)
perc3 <- prop.table(weighted_freqtable3) * 100
weighted_freqtable4 <- xtabs(weight_r1 ~ correa_feeling, data = electoral2023)
perc4 <- prop.table(weighted_freqtable4) * 100

print(perc1)
print(perc2)
print(perc3)
print(perc4)


#weighted cross table transfers
weighted_transfer1 <- xtabs(weight_r1 ~ correismo + correa_feeling , data = electoral2022)
transfer1 <- prop.table(weighted_transfer1) * 100
print(transfer1)
weighted_transfer2 <- xtabs(weight_r1 ~ correismo + correa_feeling , data = electoral2023)
transfer2 <- prop.table(weighted_transfer2) * 100
print(transfer2)


#weighted cross tables between vote choice and self-identification scale
weighted_table1 <- xtabs(weight_r1 ~ correismo + votechoice1 , data = electoral2022)
table_perc1 <- prop.table(weighted_table1, margin=1) * 100
print(table_perc1)
weighted_table2 <- xtabs(weight_r1 ~ correismo + votechoice1, data = electoral2023)
table_perc2 <- prop.table(weighted_table2, margin=1) * 100
print(table_perc2)


#weighted cross tables between vote choice and feeling thermometer
weighted_table3 <- xtabs(weight_r1 ~ correa_feeling + votechoice1 , data = electoral2022)
table_perc3 <- prop.table(weighted_table3, margin=1) * 100
print(table_perc3)
weighted_table4 <- xtabs(weight_r1 ~ correa_feeling + votechoice1, data = electoral2023)
table_perc4 <- prop.table(weighted_table4, margin=1) * 100
print(table_perc4)


#weighted cross tables between vote choice and correístas partisans
weighted_table5 <- xtabs(weight_r1 ~ partisans + votechoice1 , data = electoral2022)
table_perc5 <- prop.table(weighted_table5, margin=1) * 100
print(table_perc5)
weighted_table6 <- xtabs(weight_r1 ~ partisans + votechoice1, data = electoral2023)
table_perc6 <- prop.table(weighted_table6, margin=1) * 100
print(table_perc6)
